import time
import random
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
import pandas as pd
import re
import string
import codecs
import os
from unidecode import unidecode
from joblib import Parallel, delayed
import multiprocessing
import math

# Download number of papers
ctyCollect = [] # To store country names
searchUsed = [] # To store search terms
legCollect = [] # To store chamber's type
numberPapers = [] # To store number of written papers
df = pd.read_excel('ChamberNames.xlsx', encoding = 'utf_8')
df = df.rename({'Country': 'country', 'Nationality':'nationality', 'Chamber (lower, upper, unicameral)': 'ctype'
    , 'English name': 'chamber', 'Chamber name 1':'name1', 'Chamber name 2':'name2'
    , 'Chamber name 3':'name3', 'Chamber name 4':'name4'}, axis = 'columns')
for i in range(0, len(df)):
    papers = -9999999
    country = df.country[i]
    typeLeg = df.ctype[i]
    chamber = unidecode(''.join(df.chamber[i]))
    nationality = unidecode(''.join(df.nationality[i]))
    # Define searches
    # If four names are avaliable
    if('NAA' != unidecode(''.join(df.name4[i]))):
        name1 = unidecode(''.join(df.name1[i]))
        name2 = unidecode(''.join(df.name2[i]))
        name3 = unidecode(''.join(df.name3[i]))
        name4 = unidecode(''.join(df.name4[i]))
        if(typeLeg == 'upper'):
            search = '(' + country + ' OR ' + nationality + ')' + ' AND (' + '"' +  chamber + '"' + ' OR ' + '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR ' + '"' +  name3 + '"' + ' OR ' + '"' +  name4 + '"' + ' OR senate)'
        else:
            search =  '(' + country + ' OR ' + nationality + ')' + ' AND (' + '"' +  chamber + '"' + ' OR ' + '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR ' + '"' +  name3 + '"' + ' OR ' + '"' +  name4 + '"' + ' OR parliament OR "chamber of deputies")'  
    # If three names are avaliable
    elif('NAA' != unidecode(''.join(df.name3[i]))):
        name1 = unidecode(''.join(df.name1[i]))
        name2 = unidecode(''.join(df.name2[i]))
        name3 = unidecode(''.join(df.name3[i]))
        if(typeLeg == 'upper'):
            search =  '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' + '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR ' + '"' +  name3 + '"' + ' OR senate)'
        else:
            search =  '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' +  '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR ' + '"' +  name3 + '"' + ' OR parliament OR "chamber of deputies")'
    
    # If two names are avaliable
    elif('NAA' != unidecode(''.join(df.name2[i]))):
        name1 = unidecode(''.join(df.name1[i]))
        name2 = unidecode(''.join(df.name2[i]))
        if(typeLeg == 'upper'):
            search = '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' +  '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR senate)'
        else:
            search = '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' +  '"' +  name1 + '"' + ' OR ' + '"' +  name2 + '"' + ' OR parliament OR "chamber of deputies")'
    
    # If one name is avaliable
    else:
        name1 = unidecode(''.join(df.name1[i]))
        if(typeLeg == 'upper'):
            search = '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' +  '"' +  name1 + '"' + ' OR senate)'
        else:
            search = '(' + country + ' OR ' + nationality + ')' +  ' AND (' + '"' +  chamber + '"' + ' OR ' +  '"' +  name1 + '"' + ' OR parliament OR "chamber of deputies")'
  
    # Open Chrome
    # MACdriver = '/usr/local/bin/chromedriver' 
    # driver = webdriver.Chrome(MACdriver)
    LINUXdriver = "/usr/lib/chromium-browser/chromedriver" # For Linux
    driver = webdriver.Chrome(LINUXdriver)
    # Open webpage
    webpage = "http://apps.webofknowledge.com.libproxy.wustl.edu/WOS_GeneralSearch_input.do?product=WOS&search_mode=GeneralSearch&SID=6CUAinyAisI8OALbXGS&preferencesSaved="
    driver.get(webpage)
    # Type search
    elem = driver.find_element_by_name("WOS_GeneralSearch_input_form")
    elem = elem.find_element_by_class_name("block-search")
    elem = elem.find_element_by_class_name("block-search-content")
    elem = elem.find_element_by_class_name("search-criteria")
    elem = elem.find_element_by_class_name("search-criteria-list")
    elem = elem.find_element_by_tag_name("table")
    elem = elem.find_element_by_tag_name("tbody")
    elem = elem.find_element_by_class_name("search-criteria-cell1")
    elem = elem.find_element_by_tag_name("div")
    elem = elem.find_element_by_tag_name("input")
    elem.clear()
    elem.send_keys(search)

    # Select database
    # time.sleep(random.uniform(3, 6))
    elem = driver.find_element_by_xpath('//*[@title="Show/Hide more settings"]')
    elem.click()
    elem = driver.find_element_by_xpath('//*[@type="checkbox"]')
    elem = driver.find_element_by_xpath('//*[@aria-label="Editions to include from Web of Science Core Collection: Citation Indexes "]')
    elem = elem.find_elements_by_tag_name("input")
    elem[0].click()
    elem[2].click()
    elem[3].click()

    # Define the period
    elem = driver.find_element_by_id('WOS_GeneralSearch_input_form')
    elem = elem.find_element_by_id('timespan')
    elem = elem.find_elements_by_tag_name('div')
    elem = elem[3].find_element_by_class_name('selection')
    elem.click()
    select = Select(elem.find_element_by_xpath('//*[@aria-label="Custom timespan range start"]'))
    select.select_by_value('1990')
    elem = driver.find_element_by_class_name('select2-search__field')
    elem.send_keys(1990)


    # Clik Search
    elem = driver.find_element_by_class_name("searchButton")
    elem.click()

    # time.sleep(random.uniform(3, 6))

    try:
        # Now we open the Web Of Science categories
        elem = driver.find_elements_by_xpath('//*[@title="More Refine Results"]')
        elem[1].click()
        elem = driver.find_elements_by_xpath("//*[contains(text(), 'POLITICAL SCIENCE')]")
        for j in range(len(elem)):
            if(len(elem[j].text.split())==3):
                papers = elem[j].text
                break
            else:
                continue
        # Clean before saving
        papers = ''.join(c for c in papers if c.isdigit())
        # Store results
        ctyCollect.append(country)
        legCollect.append(typeLeg)
        searchUsed.append(search)
        numberPapers.append(int(papers))

    except IndexError:
        try:
            elem = driver.find_element_by_xpath('//*[@title="More Refine Results"]')
            elem.click()
            elem = driver.find_elements_by_xpath("//*[contains(text(), 'POLITICAL SCIENCE')]")
            for j in range(len(elem)):
                if(len(elem[j].text.split())==3):
                    papers = elem[j].text
                    break
                else:
                    continue
            # Clean before saving
            papers = ''.join(c for c in papers if c.isdigit())
            # Store results
            ctyCollect.append(country)
            legCollect.append(typeLeg)
            searchUsed.append(search)
            numberPapers.append(int(papers))
        except NoSuchElementException:
            ctyCollect.append(country)
            legCollect.append(typeLeg)
            searchUsed.append(search)
            numberPapers.append(0)
            papers = -999999999
        except TypeError:
            ctyCollect.append(country)
            legCollect.append(typeLeg)
            searchUsed.append(search)
            numberPapers.append(0)
            papers = -999999999
        finally:
            pass
    except:
        ctyCollect.append(country)
        legCollect.append(typeLeg)
        searchUsed.append(search)
        numberPapers.append(0)
        papers = -999999999
    finally:
        print 'Done with %s from %s. It has %s' %(country, typeLeg, papers)
    # Close Browser
        driver.stop_client()
        driver.close()

# Export results to .csv
chamberPapers = pd.DataFrame({'country':ctyCollect, 
    'typechamber':legCollect,'search':searchUsed,
    'articles':numberPapers})
chamberPapers.to_csv("ByChamberPapers.csv", encoding = 'utf-8')

